pacman::p_load(tidyverse, ggstatsplot, plotly, ggplot2, ggdist, dplyr)EDA_Vanessa
weather_list <- list.files(path = "WeatherStation_Cleaned",
recursive = TRUE,
pattern = "\\.csv$",
full.names = TRUE)
weather <- read_csv(weather_list)Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 163904 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): Station, Daily Rainfall Total, Highest 30 Min Rainfall, Highest 120...
dbl (9): Year, Month, Day, Daily Rainfall Total (mm), Mean Temperature, Maxi...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
weather <- weather %>%
select(1:13, "DailyRainfall" = 5, "HighestRainfall30" = 6,
"HighestRainfall60" = 7, "HighestRainfall120" = 8,
"MeanTemp" = 9, "MaxTemp" = 10, "MinTemp" = 11,
"MeanWindSpeed" = 12, "MaxWindSpeed" = 13)weather$Year <- as.integer(weather$Year)
weather$Month <- month.abb[weather$Month]
weather$Day <- as.integer(weather$Day)
weather$DailyRainfall <- as.numeric(weather$DailyRainfall)
weather$HighestRainfall30 <- as.numeric(weather$HighestRainfall30)Warning: NAs introduced by coercion
weather$HighestRainfall60 <- as.numeric(weather$HighestRainfall60)Warning: NAs introduced by coercion
weather$HighestRainfall120 <- as.numeric(weather$HighestRainfall120)Warning: NAs introduced by coercion
weather$MeanTemp <- as.numeric(weather$MeanTemp)
weather$MaxTemp <- as.numeric(weather$MaxTemp)
weather$MinTemp <- as.numeric(weather$MinTemp)
weather$MeanWindSpeed <- as.numeric(weather$MeanWindSpeed)
weather$MaxWindSpeed <- as.numeric(weather$MaxWindSpeed)
glimpse(weather)Rows: 163,904
Columns: 13
$ Station <chr> "Admiralty", "Admiralty", "Admiralty", "Admiralty",…
$ Year <int> 2009, 2009, 2009, 2009, 2009, 2009, 2009, 2009, 200…
$ Month <chr> "Jan", "Jan", "Jan", "Jan", "Jan", "Jan", "Jan", "J…
$ Day <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
$ DailyRainfall <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ HighestRainfall30 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ HighestRainfall60 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ HighestRainfall120 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ MeanTemp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ MaxTemp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ MinTemp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ MeanWindSpeed <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ MaxWindSpeed <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
weather$DDate <- as.Date(paste(weather$Year,
weather$Month,
weather$Day, sep = "-"),
format = "%Y-%b-%d")
Temp_month <- weather %>%
group_by(Station, Year, Month) %>%
summarise(AveMeanTemp = mean(MeanTemp, na.rm = TRUE),
MaxMaxTemp = max(MaxTemp),
MinMinTemp = min(MinTemp))`summarise()` has grouped output by 'Station', 'Year'. You can override using
the `.groups` argument.
Temp_monthYr <- Temp_month %>%
mutate(MonthOfYear = match(Month, month.abb) + (Year - 1981) * 12 )
Temp_yr_error <- weather %>%
group_by(Year) %>%
summarise(n = n(), Temp = mean(MeanTemp, na.rm = TRUE),
sd = sd(MeanTemp, na.rm = TRUE)) %>%
mutate(se = sd/sqrt(n-1))Temp <- Temp_monthYr %>%
filter(Station == 'Changi')
gg <- ggplot(Temp, aes(x = MonthOfYear, y = AveMeanTemp,
color = factor(Year))) +
geom_line(linewidth = 0.1) +
geom_point(aes(text = paste0("Month:", Month,
"<br>MeanTemp:", AveMeanTemp, "ºC"))) +
scale_x_continuous(breaks = seq(from = 1, by = 120, length.out = 42),
labels = seq(from = 1981, by = 10, length.out = 42)) +
labs(x = "Year", y = "Monthly mean temperature (ºC)", color = "Year",
title = "Trend of Monthly Mean Temperature at Changi Station from 1981 to 2023",
subtitle = "Gentle trend line sloping upwards from 1981",
caption = "Data from Meteorological Service Singapore website") +
geom_smooth(method = "lm", formula = y ~ splines::bs(x, 3),
se = FALSE, color = "black") +
theme_minimal() Warning in geom_point(aes(text = paste0("Month:", Month, "<br>MeanTemp:", :
Ignoring unknown aesthetics: text
ggplotly(gg, tooltip = "text") %>%
layout(title = list(text =
paste0(gg$labels$title, "<br>", "<sup>",
gg$labels$subtitle, "</sup>"),
font = list(weight = "bold")),
showlegend = FALSE,
annotations = list(text = gg$labels$caption,
xref = "paper", yref = "paper",
x = 1000, y = 24,
xanchor = "right", yanchor = "top",
showarrow = FALSE)) Warning: Removed 24 rows containing non-finite values (`stat_smooth()`).
model <- lm(Temp ~ Year, Temp_yr_error)
y_intercept = coef(model)[1]
slope_coeff = coef(model)[2]
adjust_yintercept = slope_coeff * 1982 + y_intercept
gg <- ggplot(Temp_yr_error) +
geom_errorbar(aes(x = factor(Year), ymin = Temp - 2.58 * se,
ymax = Temp+2.58*se),
width=0.2, colour="black",
alpha=0.9, size=0.5) +
geom_point(aes(x = factor(Year), y = Temp,
text = paste0("Year:", `Year`,
"<br>Avg. Temp:", round(Temp, digits = 2),
"<br>95% CI:[",
round((Temp - 2.58 * se), digits = 2), ",",
round((Temp + 2.58 * se), digits = 2),"]")),
stat="identity", color="darkred",
size = 1.5, alpha = 1) +
geom_abline(slope = round(slope_coeff, 4),
intercept = adjust_yintercept,
untf = TRUE,
color = "blue",
linetype = "dashed")+
geom_text(aes(x = 11, y = 27.8, colour = "blue",
label = paste0("Temp=",
round(slope_coeff, 4), "* Year ",
round(y_intercept, 4)))) +
labs (x = "Year", y = "Annual mean temperatures (°C)",
title = "99% Confidence interval of annual mean temperatures by year",
subtitle = "From 1982 to 2023",
caption = "Data from Meteorological Service Singapore website") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1),
plot.title = element_text(face = "bold", size = 12))Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Warning in geom_point(aes(x = factor(Year), y = Temp, text = paste0("Year:", :
Ignoring unknown aesthetics: text
Warning in geom_abline(slope = round(slope_coeff, 4), intercept =
adjust_yintercept, : Ignoring unknown parameters: `untf`
ggplotly(gg, tooltip = "text") %>%
layout(title = list(text =
paste0(gg$labels$title, "<br>", "<sup>",
gg$labels$subtitle, "</sup>"),
font = list(weight = "bold")),
showlegend = FALSE)Temp <- Temp_month %>%
group_by(Year, Month) %>%
summarise(MTemp = mean(AveMeanTemp, na.rm = TRUE))`summarise()` has grouped output by 'Year'. You can override using the
`.groups` argument.
gg <- ggplot(Temp, aes(factor(Month, levels = month.abb), factor(Year),
fill = MTemp)) +
geom_tile(color = "white",
aes(text = paste0(Year, "-", Month,
"<br>Temp:", round(MTemp, 2), "°C"))) +
theme_minimal() +
scale_fill_gradient(name = "Temperature",
low = "sky blue",
high = "dark blue") +
labs(x = NULL, y = NULL,
title = "Mean temperatures by year and month",
subtitle = "Hotter in more months of 2023 as compared to the other years")Warning in geom_tile(color = "white", aes(text = paste0(Year, "-", Month, :
Ignoring unknown aesthetics: text
ggplotly(gg, tooltip = "text")Show the code
gg <- ggplot(weather,
aes(x = factor(Month, levels = month.abb), y = MeanTemp)) +
geom_violin(color = "navy", fill = "lightblue") +
geom_hline(data = weather,
aes(yintercept = mean(MeanTemp, na.rm = TRUE)),
linetype = "dashed", size = 1, colour = "brown") +
geom_text(aes(x = 4.5, y = 27.3,
label = paste0("Mean : ",
round(mean(MeanTemp,na.rm = TRUE),2), "°C")),
colour = "brown") +
stat_summary(fun = mean, geom = "point",
shape = 20, size = 3, color = "orange",
aes(text = paste0("Mean : ", round(after_stat(y), 2), "°C"))) +
theme_minimal() +
labs(title = "Daily mean temperature across each month from 1981 to 2023",
subtitle = "November to February are cooler as compared to the rest of the year",
y = "Daily mean Temperatures (°C)",
x = "Month",
caption = "Data from Meteorological Service Singapore website")Warning in stat_summary(fun = mean, geom = "point", shape = 20, size = 3, :
Ignoring unknown aesthetics: text
Show the code
ggplotly(gg, tooltip = "text") %>%
layout(title = list(text =
paste0(gg$labels$title, "<br>", "<sup>",
gg$labels$subtitle, "</sup>"),
font = list(weight = "bold")))Warning: Removed 58654 rows containing non-finite values (`stat_ydensity()`).
Warning: Removed 58654 rows containing non-finite values (`stat_summary()`).